osuRepo = 'http://ftp.osuosl.org/pub/cran/'
if(!require(tidyverse)){
install.packages('tidyverse',repos=osuRepo)
}
if(!require(caret)){
install.packages('caret',repos=osuRepo)
}## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'default/
## America/Los_Angeles'
if(!require(caTools)){
install.packages('caTools',repos=osuRepo)
}
if(!require(reshape2)){
install.packages('reshape2',repos=osuRepo)
}
source('config.R')file_list = list.files(confoundDir, pattern = 'confounds.tsv', recursive = TRUE)
if (!file.exists("~/Documents/code/auto-motion-fmriprep/tag.csv")) {
for (file in file_list){
# if the merged dataset doesn't exist, create it
if (!exists('dataset')){
filePattern = paste(subPattern, wavePattern, taskPattern, runPattern, 'bold_confounds.tsv', sep = "_")
dataset = read_tsv(paste0(confoundDir, file)) %>%
mutate(file = file) %>%
extract(file, c('sub', 'wave', 'task', 'run'),
file.path('sub-.*','ses-wave.*', 'func', filePattern)) %>%
mutate(wave = as.integer(wave),
run = as.integer(run),
stdDVARS = as.numeric(ifelse(stdDVARS %in% "n/a", NA, stdDVARS)),
`non-stdDVARS` = as.numeric(ifelse(`non-stdDVARS` %in% "n/a", NA, `non-stdDVARS`)),
`vx-wisestdDVARS` = as.numeric(ifelse(`vx-wisestdDVARS` %in% "n/a", NA, `vx-wisestdDVARS`)),
FramewiseDisplacement = as.numeric(ifelse(FramewiseDisplacement %in% "n/a", NA, FramewiseDisplacement)),
volume = row_number()) %>%
select(sub, wave, task, run, volume, everything())
}
# if the merged dataset does exist, append to it
else {
filePattern = paste(subPattern, wavePattern, taskPattern, runPattern, 'bold_confounds.tsv', sep = "_")
tmp = read_tsv(paste0(confoundDir, file)) %>%
mutate(file = file) %>%
extract(file, c('sub', 'wave', 'task', 'run'),
file.path('sub-.*','ses-wave.*', 'func', filePattern)) %>%
mutate(wave = as.integer(wave),
run = as.integer(run),
stdDVARS = as.numeric(ifelse(stdDVARS %in% "n/a", NA, stdDVARS)),
`non-stdDVARS` = as.numeric(ifelse(`non-stdDVARS` %in% "n/a", NA, `non-stdDVARS`)),
`vx-wisestdDVARS` = as.numeric(ifelse(`vx-wisestdDVARS` %in% "n/a", NA, `vx-wisestdDVARS`)),
FramewiseDisplacement = as.numeric(ifelse(FramewiseDisplacement %in% "n/a", NA, FramewiseDisplacement)),
volume = row_number()) %>%
select(sub, wave, task, run, volume, everything())
dataset = bind_rows(dataset, tmp)
rm(tmp)
}
}
} else {
dataset = read.csv("~/Documents/code/auto-motion-fmriprep/tag.csv", stringsAsFactors = FALSE)
}For TAG, we don’t have hand coded data so we’re just going to make up whether the volume is coded as trash or not
trash = data.frame(trash = rbinom(nrow(dataset), 1, .01))
dataset = bind_cols(dataset, trash) %>%
select(sub, wave, task, run, volume, trash, everything())data.plot = dataset %>%
gather(feature, value, -c(sub, wave, task, run, volume, trash))
# ggplot(data.plot, aes(1, value)) +
# geom_boxplot() +
# facet_wrap(~feature, scales = "free", ncol = 4)
ggplot(data.plot, aes(value)) +
geom_density(fill = "pink") +
facet_wrap(~feature, scales = "free", ncol = 4)## Warning: Removed 868411 rows containing non-finite values (stat_density).
features = c("CSF", "WhiteMatter", "GlobalSignal", "FramewiseDisplacement", "stdDVARS", "tCompCor00", "aCompCor00")
subs = unique(dataset$sub)[1:5]
data.sub = data.plot %>%
filter(feature %in% features) %>%
mutate(sort = ifelse(feature == "CSF", 1,
ifelse(feature == "GlobalSignal", 2,
ifelse(feature == "WhiteMatter", 3,
ifelse(feature == "FramewiseDisplacement", 4,
ifelse(feature == "stdDVARS", 5,
ifelse(feature == "tCompCor00", 6,
ifelse(feature == "aCompCor00", 7, NA)))))))) %>%
filter(sub %in% subs)
# ggplot(data.sub, aes(x = volume, y = value)) +
# geom_line(aes(color = feature), size = .25) +
# facet_wrap(reorder(feature, sort) ~ task + run, ncol = 4, scales = "free") +
# scale_color_manual(values = wesanderson::wes_palette("Zissou", 7, type = "continuous"))
nada = data.sub %>%
group_by(sub) %>%
do({
plot = ggplot(., aes(volume, value)) +
geom_line(aes(color = feature), size = .25, show.legend = FALSE) +
facet_wrap(reorder(feature, sort) ~ task + run, ncol = 4, scales = "free") +
scale_color_manual(values = wesanderson::wes_palette("Zissou", 7, type = "continuous"))
labs(title = .$sub[[1]])
print(plot)
#ggsave(plot, file = paste0(plotDir,.$sub[[1]],'.pdf'), height = 10, width = 12)
data.frame()
})## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_path).
# define color palette
palette = c("#0071CF", "#FFD700", "#E2261C")
# subset and matrix-ify
dataset.corr = dataset %>%
select(-c(sub, wave, task, run, volume, trash, starts_with("NonSteady"))) %>%
cor(., use = "pairwise.complete.obs") %>%
melt(.)
# plot the data
ggplot(dataset.corr, aes(as.factor(Var2), as.factor(Var1), fill = value)) +
geom_tile(color = "white")+
scale_fill_gradient2(low = palette[1], high = palette[3], mid = "white",
midpoint = 0, limit = c(-1,1), space = "Lab",
name="pearson\ncorrelation") +
labs(x = "",
y = "") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, size = 10, hjust = 1)) +
geom_text(aes(label = round(value,1)), size = 2.5)set.seed(101)
sample = sample.split(dataset$trash, SplitRatio = .75)
training = subset(dataset, sample == TRUE)
testing = subset(dataset, sample == FALSE)# train.svm = training %>%
# select(-c(sub, wave, task, run, volume, starts_with("NonSteady"))) %>%
# mutate(trash = ifelse(trash == 1, "yes","no"),
# trash = as.factor(trash))
#
# # specify control parameters
# fitControl = trainControl(method = "repeatedcv", number = 10, repeats = 3, classProbs = TRUE)
#
# # run initial model and print
# set.seed(101)
# svmFit = train(trash ~ .,
# data = train.svm,
# method = "svmLinear",
# trControl = fitControl,
# preProcess = c("center", "scale"),
# tuneLength = 10,
# metric = "ROC",
# verbose = FALSE,
# na.action = na.pass)
# svmFit$finalModel
#
# # predict model
# train_pred = predict(svmFit, newdata = train.svm, type="prob") %>%
# select(-no)
#
# # plot cutoff v. accuracy
# predicted = prediction(train_pred, train.svm$artifact, label.ordering = NULL)
# perf = performance(predicted, measure = "acc")
# perf.df = data.frame(cut=perf@x.values[[1]],acc=perf@y.values[[1]])
#
# ggplot(perf.df, aes(cut, acc)) +
# geom_line()
#
# # plot false v. true positive rate
# perf = performance(predicted, measure = "tpr", x.measure = "fpr")
# perf.df = data.frame(cut=perf@alpha.values[[1]],fpr=perf@x.values[[1]],tpr=perf@y.values[[1]])
#
# ggplot(perf.df, aes(fpr, tpr)) +
# geom_line()
#
# # plot specificity v. sensitivity
# perf = performance(predicted, measure = "sens", x.measure = "spec")
# perf.df = data.frame(cut=perf@alpha.values[[1]],sens=perf@x.values[[1]],spec=perf@y.values[[1]])
# ggplot(perf.df, aes(spec, sens)) +
# geom_line()
#
# ggplot(perf.df, aes(x = cut)) +
# geom_line(aes(y = sens, color = "sensitivity")) +
# geom_line(aes(y = spec, color = "specificity"))
#
# perf@alpha.values[[1]][which.max(perf@x.values[[1]]+perf@y.values[[1]])] # cut
# max(perf@x.values[[1]]+perf@y.values[[1]]) # sensitivity + specificity
#
# # cut and assess accuracy in training sample
# train_pred = predict(svmFit, newdata = train.svm, type="prob") %>%
# select(-no)
# train_pred = as.matrix(train_pred)
# train_pred[train_pred > .09] = "yes"
# train_pred[train_pred < .09] = "no"
# confusionMatrix(train_pred, train.svm$artifact)# test.svm = testing %>%
# select(-c(sub, wave, task, run, volume, starts_with("NonSteady"))) %>%
# mutate(trash = ifelse(trash == 1, "yes","no"),
# trash = as.factor(trash))
#
# # cut and assess accuracy in test sample
# test_pred = predict(svmFit, newdata = test.svm, type="prob") %>%
# select(-no)
# test_pred = as.matrix(test_pred)
# test_pred[test_pred > .09] = "yes"
# test_pred[test_pred < .09] = "no"
# confusionMatrix(test_pred, test.svm$artifact)